***********************************
* Replication for "Patience Across Payday: The Role of Scarcity in Commitment Decisions"
* Data Cleaning
* Author: Holly Dykstra 
* January 2025
************************************ 
************************************ 

set more off
clear all

***************************************************************
***************************************************************
***CHANGE TO CURRENT FOLDER

cd "~/..../"

***************************************************************
***************************************************************
***LOCAL DATASETS

local data "data.tab"
local stata "stata.dta"

***************************************************************
*LOAD IN THE DATA
***************************************************************

import delimited using `data', delimiter(tab) clear

label variable identifier "Identifier"
label variable durationinseconds "Duration in Seconds"
label variable paycheck1 "Paycheck 1"
label variable paycheck2 "Paycheck 2"
label variable paycheck3 "Paycheck 3"
label variable receiptday "Commitment Date"
label variable mainincome "Main Income"
label variable allincome "All Sources of Income"
label variable treatment "Treatment"
label variable choice "Commitment"
label variable reasontext "Reason for Decision"
label variable attention_check "Attention Check"
label variable free_text "Comments on Survey"

gen completed=0
replace completed=1 if recordeddate!=""&attention_check==2

label variable completed "Completed Part Two"

***************************************************************
*CREATE TREATMENT VARIABLES and LABELS
***************************************************************

gen treat1 = (treatment == 1)
label variable treat1 "d-11"

gen treat2 = (treatment == 2)
label variable treat2 "d-8"

gen treat3 = (treatment == 3)
label variable treat3 "d-5"

gen treat4 = (treatment == 4)
label variable treat4 "d-2"

gen treat5 = (treatment == 5)
label variable treat5 "d+1"

label variable treatment "Treatment Group"

label define treatment_lbl 1 "\textit{d} -- 11" 2 "\textit{d} -- 8" 3 "\textit{d} -- 5" 4 "\textit{d} -- 2" 5 "\textit{d} + 1"
label values treatment treatment_lbl

gen before_payday = inlist(treatment, 1, 2, 3, 4) if completed==1
label variable before_payday "Before Payday"

***************************************************************
*CREATE DATE VARIABLES AND LABELS
***************************************************************

gen double recordeddate1 = clock(recordeddate, "MDYhm")
gen int recordeddate2 = dofc(recordeddate1)
format recordeddate2  %td

gen recordeddate_week=wofd(recordeddate2)
format recordeddate_week %tw
gen recordeddate_month=mofd(recordeddate2)
format recordeddate_month %tm
gen recordeddate_dayofweek=dow(recordeddate2)

drop recordeddate1 recordeddate
rename recordeddate2 recordeddate

label variable recordeddate_week "Week"
label variable recordeddate_month "Month"
label define dow_lbl 0 "Sunday" 1 "Monday" 2 "Tuesday" 3 "Wednesday" 4 "Thursday" 5 "Friday" 6 "Saturday"
label values recordeddate_dayofweek dow_lbl
label variable recordeddate_dayofweek "Day of the Week"
label variable recordeddate "Date"

gen double chosendate1 = clock(chosendate, "MDY")
gen int chosendate2 = dofc(chosendate1)
format chosendate2  %td

gen chosendate_week=wofd(chosendate2)
format chosendate_week %tw
gen chosendate_month=mofd(chosendate2)
format chosendate_month %tm
gen chosendate_dayofweek=dow(chosendate2)

drop chosendate1 chosendate
rename chosendate2 chosendate

label variable chosendate_week "Chosen Treatment Week"
label variable chosendate_month "Chosen Treatment Month"
label define dow_lbl2 0 "Sunday" 1 "Monday" 2 "Tuesday" 3 "Wednesday" 4 "Thursday" 5 "Friday" 6 "Saturday"
label values chosendate_dayofweek dow_lbl2
label variable chosendate_dayofweek "Chosen Treatment Day of the Week"
label variable chosendate "Chosen Treatment Date"

***************************************************************
*CREATE DEMOGRAPHIC VARIABLES AND LABELS
***************************************************************

foreach var in white married fulltime bachelors education_hs age_combined {
    gen `var' = 0 if completed == 1
}
replace white = 1 if race == 0
replace married = 1 if marriage == 1
replace fulltime = 1 if employment_status == 0
replace bachelors = 1 if education == 4

gen education_group = cond(education == 1, 1, cond(inlist(education, 2, 3), 2, cond(education == 4, 3, cond(inlist(education, 5, 6, 7), 4, 0)))) if completed == 1

replace education_hs = 1 if education_group == 1 | education_group == 0

rename age age_str
gen age = real(age_str)
replace age = 65 if missing(age) & completed == 1
drop age_str

replace age_combined = 1 if inrange(age, 35, 100)

gen age_group = cond(inrange(age, 25, 34), 1, cond(inrange(age, 35, 44), 2, cond(inrange(age, 45, 54), 3, cond(inrange(age, 55, 64), 4, cond(inrange(age, 65, 100), 5, 0))))) if completed == 1

label variable race "Race and/or Ethnic Background"
label define race1 0 "Caucasian" 1 "Hispanic, Latino, or Spanish origin" 2 "Black or African American" 3 "American Indian or Alaska Native" 4 "Asian" 5 "Native American or Pacific Islander" 6 "Other"
label values race race1

label variable white "White Indicator"

label variable marriage "Marital Status"
label define marriage1 0 "Single" 1 "Married or Domestic Partnership" 2 "Widowed" 3 "Divorced" 4 "Separated"
label values marriage marriage1

label variable married "Married Indicator"

label variable employment_status "Employment Status"
label define employment_status1 0 "Employed Full Time" 1 "Employed Part Time" 2 "Unemployed and Looking" 3 "Unemployed and Not Looking" 4 "Student" 5 "Retired" 6 "Homemaker" 7 "Self-employed" 8 "Unable to Work"
label values employment_status employment_status1

label variable fulltime "Full-Time Employment Indicator"

label variable education "Education"
label define education1 0 "Less than High School" 1 "High School or Equivalent" 2 "Some College" 3 "Associate's Degree" 4 "Bachelor's Degree" 5 "Master's Degree" 6 "Professional Degree" 7 "Doctorate"
label values education education1

label variable education_group "Educational Status"
label define education_group1 0 "Less than High School" 1 "High School or Equivalent" 2 "Some College or Associate's Degree" 3 "Bachelor's Degree" 4 "Graduate Degree"
label values education_group education_group1

label variable education_hs "Did not Complete HS Indicator"

label variable bachelors "Bachelor's Degree Indicator"

label variable age_group "Age Group"
label define age_group1 0 "18-24" 1 "25-34" 2 "35-44" 3 "45-54" 4 "55-64" 5 "65+"
label values age_group age_group1

label variable age_combined "Age Indicator"
label define age_lbl 0 "Under 35" 1 "Age 35+"
label values age_combined age_lbl

label variable age "Age in Years"

label variable gender "Female"
label define gender1 0 "Male" 1 "Female"
label values gender gender1

***************************************************************
*CREATE SCARCITY VARIABLE AND LABELS
***************************************************************

label define lowcash_lbl 0 "Yes"
label define lowcash_lbl 1 "Somewhat", add
label define lowcash_lbl 2 "No", add

label values low_on_cash lowcash_lbl
label variable low_on_cash "Are you low on cash right now?"

gen scarcity = 0 if completed == 1
replace scarcity = 1 if low_on_cash==1|low_on_cash==2

label variable scarcity "Scarcity Indicator"
label define scarcity1 0 "Not low on cash" 1 "Low on cash"
label values scarcity scarcity1

***************************************************************
*CREATE INCOME VARIABLES and LABELS
***************************************************************

label define income_lbl 0 "Less than $5,000"
label define income_lbl 1 "$5,000 - $7,499", add
label define income_lbl 2 "$7,500 - $9,999", add
label define income_lbl 3 "$10,000 - $12,499", add
label define income_lbl 4 "$12,500 - $14,999", add
label define income_lbl 5 "$15,000 - $19,999", add
label define income_lbl 6 "$20,000 - $24,999", add
label define income_lbl 7 "$25,000 - $29,999", add
label define income_lbl 8 "$30,000 - $34,999", add
label define income_lbl 9 "$35,000 - $39,999", add
label define income_lbl 10 "$40,000 - $49,999", add
label define income_lbl 11 "$50,000 - $59,999", add
label define income_lbl 12 "$60,000 - $74,999", add
label define income_lbl 13 "$75,000 - $99,999", add
label define income_lbl 14 "$100,000 - $149,999", add
label define income_lbl 15 "$150,000 - $249,999", add
label define income_lbl 16 "$249,999 - $499,999", add
label define income_lbl 17 "$500,000 - $1 million", add
label define income_lbl 18 "Over $1 million", add

label values income income_lbl
label variable income "Income"

gen income_group = 0 if completed == 1
replace income_group = 1 if inlist(income, 7, 8, 9, 10)
replace income_group = 2 if inlist(income, 11, 12)
replace income_group = 3 if income == 13
replace income_group = 4 if inlist(income, 14, 15)
replace income_group = 5 if inlist(income, 16, 17, 18)

label variable income_group "Income Categories"
label define income_group2 0 "Less than $25,000" 1 "$25,000 - $49,999" 2 "$50,000 - $74,999" 3 "$75,000 - $99,999" 4 "$100,000 - $249,999" 5 "$250,000 and Over"
label values income_group income_group2

gen income_combined = 0 if completed == 1
replace income_combined = 1 if inlist(income_group, 2, 3)
replace income_combined = 2 if inlist(income_group, 4, 5)

label variable income_combined "Income Groups"
label define income_combined1 0 "Less than \\$50,000" 1 "\\$50,000 - \\$99,999" 2 "Over \\$100,000"
label values income_combined income_combined1

gen inc25_49 = 0 if completed == 1
replace inc25_49 = 1 if income_group == 1

label variable inc25_49 "Income: $25,000 - $49,999 Indicator"

label define mainincome_lbl 0 "Wages and Salaries"
label define mainincome_lbl 1 "Self-Employment", add
label define mainincome_lbl 2 "Amazon Mechanical Turk", add
label define mainincome_lbl 3 "Unemployment Compensation", add
label define mainincome_lbl 4 "Social Security or Disability", add
label define mainincome_lbl 5 "Public Assistance or Welfare", add
label define mainincome_lbl 6 "Retirement Income", add
label define mainincome_lbl 7 "Other income", add

label values mainincome mainincome_lbl
label variable mainincome "Main Source of Income"

gen allincome_dum = 0

* generate temporary variables by splitting allincome
split allincome, parse(", ") destring generate(temp)

* loop over all observations
qui forval i = 1/`=_N' {
    * initialize the count of non-2 numbers
    local non_2_count = 0
    
    * loop over each temporary variable
    foreach var of varlist temp* {
        * increment the count if the value is not 2
        if `var'[`i'] != 2 & `var'[`i'] != . {
            local non_2_count = `non_2_count' + 1
        }
    }
    
    * if there are more than one non-2 numbers, set allincome_sp to 1
    if `non_2_count' > 1 {
        replace allincome_dum = 1 in `i'
    }
}

drop temp*
label variable allincome_dum "Multiple Non-MTurk Income Sources Indicator"

***************************************************************
*CREATE EMERGENCY VARIABLES and LABELS
***************************************************************

label define emergency_lbl 0 "I could easily get the money"
label define emergency_lbl 1 "I could get the money, but it would involve some sacrifices", add
label define emergency_lbl 2 "I would have to do something drastic to raise the money", add
label define emergency_lbl 3 "I don't think I could raise the money", add

label values emergency_money emergency_lbl
label variable emergency_money "Ability to Raise $2,000 for an Emergency"

gen emergency_canraise = 0 if completed == 1
replace emergency_canraise =1 if emergency_money<3

gen emergency_cannotraise = 1 if completed == 1
replace emergency_cannotraise = 0 if emergency_money<3

label variable emergency_canraise "Can Raise \\$2,000"
label define emerg_lbl 0 "Cannot Raise \\$2,000" 1 "Can Raise \\$2,000"
label values emergency_canraise emerg_lbl

label variable emergency_cannotraise "Cannot Raise \\$2,000"
label define emerg_lbl2 0 "Can Raise \\$2,000" 1 "Cannot Raise \\$2,000"
label values emergency_cannotraise emerg_lbl2

label variable credit_number "Number of Credit Cards"
label variable credit_amount "Amount of Credit"

***************************************************************
*CREATE CFPB FINANCIAL WELLBEING VARIABLES AND LABEL
***************************************************************

foreach v in 1 2 4 8 {

gen cfpb`v' = 2 if completed == 1
replace cfpb`v' = 0 if cfpb`v'_reverse == 4
replace cfpb`v' = 1 if cfpb`v'_reverse == 3
replace cfpb`v' = 3 if cfpb`v'_reverse == 1
replace cfpb`v' = 4 if cfpb`v'_reverse == 0

drop cfpb`v'_reverse
}

egen cfpb_raw = rowtotal(cfpb*) if complete == 1

matrix cfpb_raw_list = (0,	1,	2,	3,	4,	5,	6,	7,	8,	9,	10,	11,	12,	13,	14,	15,	16,	17,	18,	19,	20,	21,	22,	23,	24,	25,	26,	27,	28,	29,	30,	31,	32,	33,	34,	35,	36,	37,	38,	39,	40)

matrix cfpb_under62_list = (14,	19,	22,	25,	27,	29,	31,	32,	34,	35,	37,	38,	40,	41,	42,	44,	45,	46,	47,	49,	50,	51,	52,	54,	55,	56,	58,	59,	60,	62,	63,	65,	66,	68,	69,	71,	73,	75,	78,	81,	86)

matrix  cfpb_62plus_list = (14,	20,	24,	26,	29,	31,	33,	35,	36,	38,	39,	41,	42,	44,	45,	46,	48,	49,	50,	52,	53,	54,	56,	57,	58,	60,	61,	63,	64,	66,	67,	69,	71,	73,	75,	77,	79,	82,	84,	88,	95)

gen cfpb_total = 0 if completed == 1
forvalues i = 1/41 {

replace cfpb_total = cfpb_under62_list[1,`i'] if (cfpb_raw == cfpb_raw_list[1,`i'] & age<62)
replace cfpb_total = cfpb_62plus_list[1,`i'] if (cfpb_raw == cfpb_raw_list[1,`i'] & age>61)

}

*Create standardized version
egen cfpb_total_mean = mean(cfpb_total) if completed==1
egen cfpb_total_sd = sd(cfpb_total) if completed==1
gen financialwellbeing = (cfpb_total - cfpb_total_mean) / cfpb_total_sd
label variable financialwellbeing "Financial Well-Being"
drop cfpb*

***************************************************************
*CREATE PERCEIVED STRESS SCALE AND LABEL
***************************************************************

foreach v in 4 5 7 8 {

gen cohen`v' = 2 if completed == 1
replace cohen`v' = 0 if cohen`v'_reverse == 4
replace cohen`v' = 1 if cohen`v'_reverse == 3
replace cohen`v' = 3 if cohen`v'_reverse == 1
replace cohen`v' = 4 if cohen`v'_reverse == 0

drop cohen`v'_reverse
}

egen stress = rowtotal(cohen*) if completed == 1
label variable stress "Perceived Stress Scale"
drop cohen*

***************************************************************
*LABEL TEXT CATEGORIZATION VARIABLES
***************************************************************

label variable moneysoon_gpt "Text Category: Money Sooner"
label variable stable_gpt "Text Category: Stable"
label variable need_gpt "Text Category: Need"
label variable budget_gpt "Text Category: Budget"
label variable unexpected_gpt "Text Category: Unexpected"
label variable regexpenses_gpt "Text Category: Regular Expenses"
label variable onetimeitem_gpt "Text Category: One Time Item"
label variable safer_gpt "Text Category: Safer"
label variable random_gpt "Text Category: Random"
label variable unclear_gpt "Text Category: Unclear"
label variable l_timing_gpt "Text Category: Timing (Commitment)"
label variable l_need_gpt "Text Category: Need (Commitment)"
label variable l_impatient_gpt "Text Category: Impatient (Commitment)"
label variable l_budget_gpt "Text Category: Budget (Commitment)"
label variable l_unexpected_gpt "Text Category: Unexpected (Commitment)"
label variable l_regexpenses_gpt "Text Category: Regular Expenses (Commitment)"
label variable l_onetimeitem_gpt "Text Category: One Time Item (Commitment)"
label variable l_delayedg_gpt "Text Category: Delayed Gratification (Commitment)"
label variable l_delayedb_gpt "Text Category: Delayed Bad News (Commitment)"
label variable l_safer_gpt "Text Category: Safer (Commitment)"
label variable l_random_gpt "Text Category: Random (Commitment)"
label variable l_unclear_gpt "Text Category: Unclear (Commitment)"

***************************************************************
*SAVE CLEANED DATASET
***************************************************************

save `stata', replace
